{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 加载数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T00:57:04.664661Z",
     "start_time": "2020-05-28T00:57:04.236349Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sentence #</th>\n",
       "      <th>Word</th>\n",
       "      <th>POS</th>\n",
       "      <th>Tag</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1048565</th>\n",
       "      <td>Sentence: 47958</td>\n",
       "      <td>impact</td>\n",
       "      <td>NN</td>\n",
       "      <td>O</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1048566</th>\n",
       "      <td>Sentence: 47958</td>\n",
       "      <td>.</td>\n",
       "      <td>.</td>\n",
       "      <td>O</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1048567</th>\n",
       "      <td>Sentence: 47959</td>\n",
       "      <td>Indian</td>\n",
       "      <td>JJ</td>\n",
       "      <td>B-gpe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1048568</th>\n",
       "      <td>Sentence: 47959</td>\n",
       "      <td>forces</td>\n",
       "      <td>NNS</td>\n",
       "      <td>O</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1048569</th>\n",
       "      <td>Sentence: 47959</td>\n",
       "      <td>said</td>\n",
       "      <td>VBD</td>\n",
       "      <td>O</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1048570</th>\n",
       "      <td>Sentence: 47959</td>\n",
       "      <td>they</td>\n",
       "      <td>PRP</td>\n",
       "      <td>O</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1048571</th>\n",
       "      <td>Sentence: 47959</td>\n",
       "      <td>responded</td>\n",
       "      <td>VBD</td>\n",
       "      <td>O</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1048572</th>\n",
       "      <td>Sentence: 47959</td>\n",
       "      <td>to</td>\n",
       "      <td>TO</td>\n",
       "      <td>O</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1048573</th>\n",
       "      <td>Sentence: 47959</td>\n",
       "      <td>the</td>\n",
       "      <td>DT</td>\n",
       "      <td>O</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1048574</th>\n",
       "      <td>Sentence: 47959</td>\n",
       "      <td>attack</td>\n",
       "      <td>NN</td>\n",
       "      <td>O</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Sentence #       Word  POS    Tag\n",
       "1048565  Sentence: 47958     impact   NN      O\n",
       "1048566  Sentence: 47958          .    .      O\n",
       "1048567  Sentence: 47959     Indian   JJ  B-gpe\n",
       "1048568  Sentence: 47959     forces  NNS      O\n",
       "1048569  Sentence: 47959       said  VBD      O\n",
       "1048570  Sentence: 47959       they  PRP      O\n",
       "1048571  Sentence: 47959  responded  VBD      O\n",
       "1048572  Sentence: 47959         to   TO      O\n",
       "1048573  Sentence: 47959        the   DT      O\n",
       "1048574  Sentence: 47959     attack   NN      O"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from tqdm import tqdm, trange\n",
    "\n",
    "data = pd.read_csv(\n",
    "    \"../datasets/ner/kaggle-entity-annotated-corpus/ner_dataset.csv\",\n",
    "    encoding=\"latin1\").fillna(method=\"ffill\")\n",
    "data.tail(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T00:57:32.711641Z",
     "start_time": "2020-05-28T00:57:32.704306Z"
    }
   },
   "outputs": [],
   "source": [
    "class SentenceGetter(object):\n",
    "    def __init__(self, data):\n",
    "        self.n_sent = 1\n",
    "        self.data = data\n",
    "        self.empty = False\n",
    "        \n",
    "        # 按句子分组\n",
    "        agg_func = lambda s: [(w, p, t) for w, p, t in zip(\n",
    "            s[\"Word\"].values.tolist(), s[\"POS\"].values.tolist(), s[\"Tag\"].\n",
    "            values.tolist())]\n",
    "        self.grouped = self.data.groupby(\"Sentence #\").apply(agg_func)\n",
    "        self.sentences = [s for s in self.grouped]\n",
    "\n",
    "    def get_next(self):\n",
    "        try:\n",
    "            s = self.grouped[\"Sentence: {}\".format(self.n_sent)]\n",
    "            self.n_sent += 1\n",
    "            return s\n",
    "        except:\n",
    "            return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T00:59:35.082228Z",
     "start_time": "2020-05-28T00:59:32.002277Z"
    }
   },
   "outputs": [],
   "source": [
    "getter = SentenceGetter(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:00:28.125818Z",
     "start_time": "2020-05-28T01:00:28.111569Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Sentence #\n",
       "Sentence: 1        [(Thousands, NNS, O), (of, IN, O), (demonstrat...\n",
       "Sentence: 10       [(Iranian, JJ, B-gpe), (officials, NNS, O), (s...\n",
       "Sentence: 100      [(Helicopter, NN, O), (gunships, NNS, O), (Sat...\n",
       "Sentence: 1000     [(They, PRP, O), (left, VBD, O), (after, IN, O...\n",
       "Sentence: 10000    [(U.N., NNP, B-geo), (relief, NN, O), (coordin...\n",
       "dtype: object"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 数据以句子为单位，每个句子由 单词-词性-实体类别 三元组组成\n",
    "getter.grouped.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:01:48.436159Z",
     "start_time": "2020-05-28T01:01:48.295843Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Thousands', 'of', 'demonstrators', 'have', 'marched', 'through', 'London', 'to', 'protest', 'the', 'war', 'in', 'Iraq', 'and', 'demand', 'the', 'withdrawal', 'of', 'British', 'troops', 'from', 'that', 'country', '.']\n"
     ]
    }
   ],
   "source": [
    "sentences = [[word[0] for word in sentence] for sentence in getter.sentences]\n",
    "print(sentences[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:01:50.394288Z",
     "start_time": "2020-05-28T01:01:50.345934Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['O', 'O', 'O', 'O', 'O', 'O', 'B-geo', 'O', 'O', 'O', 'O', 'O', 'B-geo', 'O', 'O', 'O', 'O', 'O', 'B-gpe', 'O', 'O', 'O', 'O', 'O']\n"
     ]
    }
   ],
   "source": [
    "labels = [[s[2] for s in sentence] for sentence in getter.sentences]\n",
    "print(labels[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:02:17.287519Z",
     "start_time": "2020-05-28T01:02:17.268863Z"
    }
   },
   "outputs": [],
   "source": [
    "# 创建标签字典\n",
    "\n",
    "tag_values = list(set(data[\"Tag\"].values))\n",
    "tag_values.append(\"PAD\")\n",
    "tag2idx = {t: i for i, t in enumerate(tag_values)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:02:24.545028Z",
     "start_time": "2020-05-28T01:02:24.537240Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'I-art': 0, 'I-nat': 1, 'B-org': 2, 'I-org': 3, 'B-tim': 4, 'B-art': 5, 'I-eve': 6, 'B-geo': 7, 'O': 8, 'B-nat': 9, 'B-eve': 10, 'B-gpe': 11, 'I-gpe': 12, 'I-per': 13, 'B-per': 14, 'I-tim': 15, 'I-geo': 16, 'PAD': 17}\n"
     ]
    }
   ],
   "source": [
    "print(tag2idx)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> 添加了 `PAD` 标签"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据管道"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:02:56.858281Z",
     "start_time": "2020-05-28T01:02:55.265236Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1.4.0'"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n",
    "from transformers import BertTokenizer, BertConfig\n",
    "\n",
    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "torch.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:03:13.514786Z",
     "start_time": "2020-05-28T01:03:13.486862Z"
    }
   },
   "outputs": [],
   "source": [
    "# 参数\n",
    "MAX_LEN = 75\n",
    "bs = 32\n",
    "\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "n_gpu = torch.cuda.device_count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:03:17.288602Z",
     "start_time": "2020-05-28T01:03:17.280145Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "device(type='cuda')"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:03:19.229892Z",
     "start_time": "2020-05-28T01:03:19.221405Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_gpu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:03:22.203181Z",
     "start_time": "2020-05-28T01:03:22.196754Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'GeForce RTX 2080 Ti'"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.cuda.get_device_name(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:03:42.998724Z",
     "start_time": "2020-05-28T01:03:42.978398Z"
    }
   },
   "outputs": [],
   "source": [
    "# 分词器\n",
    "\n",
    "model_path = \"../../H/models/huggingface/torch/bert-base-cased/\"\n",
    "tokenizer = BertTokenizer.from_pretrained(model_path, do_lower_case=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:05:41.153408Z",
     "start_time": "2020-05-28T01:05:41.147402Z"
    }
   },
   "outputs": [],
   "source": [
    "# 单词可能拆分成子词\n",
    "\n",
    "def tokenize_and_preserve_labels(sentence, text_labels):\n",
    "    tokenized_sentence = []\n",
    "    labels = []\n",
    "\n",
    "    for word, label in zip(sentence, text_labels):\n",
    "\n",
    "        # Tokenize the word and count # of subwords the word is broken into\n",
    "        tokenized_word = tokenizer.tokenize(word)\n",
    "        n_subwords = len(tokenized_word)\n",
    "\n",
    "        # Add the tokenized word to the final tokenized word list\n",
    "        tokenized_sentence.extend(tokenized_word)\n",
    "\n",
    "        # Add the same label to the new list of labels `n_subwords` times\n",
    "        labels.extend([label] * n_subwords)\n",
    "\n",
    "    return tokenized_sentence, labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:06:08.022495Z",
     "start_time": "2020-05-28T01:05:46.163254Z"
    }
   },
   "outputs": [],
   "source": [
    "tokenized_texts_and_labels = [\n",
    "    tokenize_and_preserve_labels(sent, labs)\n",
    "    for sent, labs in zip(sentences, labels)\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:06:24.268866Z",
     "start_time": "2020-05-28T01:06:24.255978Z"
    }
   },
   "outputs": [],
   "source": [
    "tokenized_texts = [\n",
    "    token_label_pair[0] for token_label_pair in tokenized_texts_and_labels\n",
    "]\n",
    "labels = [\n",
    "    token_label_pair[1] for token_label_pair in tokenized_texts_and_labels\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:06:39.416849Z",
     "start_time": "2020-05-28T01:06:39.411686Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['In', 'Beirut', ',', 'a', 'string', 'of', 'officials', 'voiced', 'their', 'anger', ',', 'while', 'at', 'the', 'United', 'Nations', 'summit', 'in', 'New', 'York', ',', 'Prime', 'Minister', 'F', '##ou', '##ad', 'Sin', '##ior', '##a', 'said', 'the', 'Lebanese', 'people', 'are', 're', '##sol', '##ute', 'in', 'preventing', 'such', 'attempts', 'from', 'destroying', 'their', 'spirit', '.']\n"
     ]
    }
   ],
   "source": [
    "print(tokenized_texts[10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:06:49.753653Z",
     "start_time": "2020-05-28T01:06:49.751598Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['O', 'B-geo', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-org', 'I-org', 'O', 'O', 'B-geo', 'I-geo', 'O', 'B-per', 'O', 'B-per', 'B-per', 'B-per', 'I-per', 'I-per', 'I-per', 'O', 'O', 'B-gpe', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n"
     ]
    }
   ],
   "source": [
    "print(labels[10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:08:00.415921Z",
     "start_time": "2020-05-28T01:07:59.794278Z"
    }
   },
   "outputs": [],
   "source": [
    "# 文本转换成数值向量，并处理成等长\n",
    "\n",
    "input_ids = pad_sequences(\n",
    "    [tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],\n",
    "    maxlen=MAX_LEN,\n",
    "    dtype=\"long\",  # np.int64。满足 torch 中的模块的要求\n",
    "    value=0.0,\n",
    "    truncating=\"post\",\n",
    "    padding=\"post\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:10:10.776545Z",
     "start_time": "2020-05-28T01:10:10.557937Z"
    }
   },
   "outputs": [],
   "source": [
    "# 标签转换成数值向量，并处理成等长\n",
    "\n",
    "tags = pad_sequences(\n",
    "    [[tag2idx.get(l) for l in lab] for lab in labels],\n",
    "    maxlen=MAX_LEN,\n",
    "    value=tag2idx[\"PAD\"],\n",
    "    padding=\"post\",\n",
    "    dtype=\"long\",\n",
    "    truncating=\"post\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:13:36.702294Z",
     "start_time": "2020-05-28T01:13:32.589062Z"
    }
   },
   "outputs": [],
   "source": [
    "# 输入掩码，表征哪些元素是填充的\n",
    "\n",
    "attention_masks = [[float(i != 0.0) for i in ii] for ii in input_ids]\n",
    "# attention_masks = input_ids != 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:13:38.250534Z",
     "start_time": "2020-05-28T01:13:38.217878Z"
    }
   },
   "outputs": [],
   "source": [
    "# 拆分数据集，训练、验证、测试\n",
    "\n",
    "tr_inputs, val_inputs, tr_tags, val_tags = train_test_split(\n",
    "    input_ids,\n",
    "    tags,\n",
    "    random_state=2018,\n",
    "    test_size=0.1,\n",
    ")\n",
    "tr_masks, val_masks, _, _ = train_test_split(\n",
    "    attention_masks,\n",
    "    input_ids,\n",
    "    random_state=2018,\n",
    "    test_size=0.1,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:14:03.360753Z",
     "start_time": "2020-05-28T01:14:03.213000Z"
    }
   },
   "outputs": [],
   "source": [
    "# 转换成 torch 张量\n",
    "\n",
    "tr_inputs = torch.tensor(tr_inputs)\n",
    "val_inputs = torch.tensor(val_inputs)\n",
    "tr_tags = torch.tensor(tr_tags)\n",
    "val_tags = torch.tensor(val_tags)\n",
    "tr_masks = torch.tensor(tr_masks)\n",
    "val_masks = torch.tensor(val_masks)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:14:19.191645Z",
     "start_time": "2020-05-28T01:14:19.186166Z"
    }
   },
   "outputs": [],
   "source": [
    "# 创建批量数据集\n",
    "\n",
    "train_data = TensorDataset(tr_inputs, tr_masks, tr_tags)\n",
    "train_sampler = RandomSampler(train_data)\n",
    "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=bs)\n",
    "\n",
    "valid_data = TensorDataset(val_inputs, val_masks, val_tags)\n",
    "valid_sampler = SequentialSampler(valid_data)\n",
    "valid_dataloader = DataLoader(valid_data, sampler=valid_sampler, batch_size=bs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 创建模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:15:19.395637Z",
     "start_time": "2020-05-28T01:15:19.389554Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2.10.0'"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import transformers\n",
    "from transformers import BertForTokenClassification, AdamW\n",
    "\n",
    "transformers.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:15:24.668050Z",
     "start_time": "2020-05-28T01:15:22.769205Z"
    }
   },
   "outputs": [],
   "source": [
    "# Bert 用于分类\n",
    "model = BertForTokenClassification.from_pretrained(\n",
    "    model_path,\n",
    "    num_labels=len(tag2idx),  # 分类类别数，加上了 PAD\n",
    "    output_attentions=False,\n",
    "    output_hidden_states=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:15:26.875246Z",
     "start_time": "2020-05-28T01:15:25.671419Z"
    }
   },
   "outputs": [],
   "source": [
    "model.cuda();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:19:08.360515Z",
     "start_time": "2020-05-28T01:19:08.353163Z"
    }
   },
   "outputs": [],
   "source": [
    "# 1. 优调整个模型，bert 模型及其上的分类层\n",
    "\n",
    "FULL_FINETUNING = True\n",
    "if FULL_FINETUNING:\n",
    "    param_optimizer = list(model.named_parameters())\n",
    "    no_decay = ['bias', 'gamma', 'beta']\n",
    "    optimizer_grouped_parameters = [{\n",
    "        'params':\n",
    "        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],\n",
    "        'weight_decay_rate':\n",
    "        0.01\n",
    "    }, {\n",
    "        'params':\n",
    "        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],\n",
    "        'weight_decay_rate':\n",
    "        0.0\n",
    "    }]\n",
    "\n",
    "# 2. 仅仅训练最顶层的分类层    \n",
    "else:\n",
    "    param_optimizer = list(model.classifier.named_parameters())\n",
    "    optimizer_grouped_parameters = [{\n",
    "        \"params\": [p for n, p in param_optimizer]\n",
    "    }]\n",
    "\n",
    "# 优化器    \n",
    "optimizer = AdamW(optimizer_grouped_parameters, lr=3e-5, eps=1e-8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:20:14.609431Z",
     "start_time": "2020-05-28T01:20:14.604163Z"
    }
   },
   "outputs": [],
   "source": [
    "from transformers import get_linear_schedule_with_warmup\n",
    "\n",
    "epochs = 3\n",
    "max_grad_norm = 1.0\n",
    "\n",
    "# 总的训练次数\n",
    "total_steps = len(train_dataloader) * epochs\n",
    "\n",
    "# 学习率规划\n",
    "scheduler = get_linear_schedule_with_warmup(\n",
    "    optimizer,\n",
    "    num_warmup_steps=0,\n",
    "    num_training_steps=total_steps,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T01:32:34.636569Z",
     "start_time": "2020-05-28T01:32:34.631582Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import f1_score, accuracy_score, classification_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T02:06:05.678715Z",
     "start_time": "2020-05-28T01:55:12.336488Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "Epoch:   0%|          | 0/3 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Average train loss: 0.06987917738890012 at epoch 0\n",
      "Validation loss: 0.13099516734480857 at epoch 0\n",
      "Validation Accuracy: 0.961826026810711at epoch 0\n",
      "Validation F1-Score: 0.6354711494451535at epoch 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "Epoch:  33%|███▎      | 1/3 [03:37<07:15, 217.62s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "       B-art       0.18      0.32      0.23        53\n",
      "       B-eve       0.42      0.38      0.40        39\n",
      "       B-geo       0.91      0.86      0.88      6325\n",
      "       B-gpe       0.93      0.96      0.94      1727\n",
      "       B-nat       0.38      0.41      0.39        34\n",
      "       B-org       0.73      0.82      0.77      3265\n",
      "       B-per       0.85      0.85      0.85      2813\n",
      "       B-tim       0.86      0.90      0.88      2254\n",
      "       I-art       0.11      0.26      0.15        19\n",
      "       I-eve       0.15      0.33      0.20        18\n",
      "       I-geo       0.81      0.77      0.79       898\n",
      "       I-gpe       0.58      0.75      0.65        20\n",
      "       I-nat       0.17      0.33      0.22         3\n",
      "       I-org       0.75      0.81      0.78      2152\n",
      "       I-per       0.94      0.85      0.89      3987\n",
      "       I-tim       0.76      0.79      0.77       767\n",
      "           O       0.99      0.99      0.99     97070\n",
      "\n",
      "    accuracy                           0.96    121444\n",
      "   macro avg       0.62      0.67      0.64    121444\n",
      "weighted avg       0.96      0.96      0.96    121444\n",
      "\n",
      "\n",
      "Average train loss: 0.06977526041050149 at epoch 1\n",
      "Validation loss: 0.13099516734480857 at epoch 1\n",
      "Validation Accuracy: 0.961826026810711at epoch 1\n",
      "Validation F1-Score: 0.6354711494451535at epoch 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "Epoch:  67%|██████▋   | 2/3 [07:15<03:37, 217.68s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "       B-art       0.18      0.32      0.23        53\n",
      "       B-eve       0.42      0.38      0.40        39\n",
      "       B-geo       0.91      0.86      0.88      6325\n",
      "       B-gpe       0.93      0.96      0.94      1727\n",
      "       B-nat       0.38      0.41      0.39        34\n",
      "       B-org       0.73      0.82      0.77      3265\n",
      "       B-per       0.85      0.85      0.85      2813\n",
      "       B-tim       0.86      0.90      0.88      2254\n",
      "       I-art       0.11      0.26      0.15        19\n",
      "       I-eve       0.15      0.33      0.20        18\n",
      "       I-geo       0.81      0.77      0.79       898\n",
      "       I-gpe       0.58      0.75      0.65        20\n",
      "       I-nat       0.17      0.33      0.22         3\n",
      "       I-org       0.75      0.81      0.78      2152\n",
      "       I-per       0.94      0.85      0.89      3987\n",
      "       I-tim       0.76      0.79      0.77       767\n",
      "           O       0.99      0.99      0.99     97070\n",
      "\n",
      "    accuracy                           0.96    121444\n",
      "   macro avg       0.62      0.67      0.64    121444\n",
      "weighted avg       0.96      0.96      0.96    121444\n",
      "\n",
      "\n",
      "Average train loss: 0.06955887069766878 at epoch 2\n",
      "Validation loss: 0.13099516734480857 at epoch 2\n",
      "Validation Accuracy: 0.961826026810711at epoch 2\n",
      "Validation F1-Score: 0.6354711494451535at epoch 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch: 100%|██████████| 3/3 [10:53<00:00, 217.78s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "       B-art       0.18      0.32      0.23        53\n",
      "       B-eve       0.42      0.38      0.40        39\n",
      "       B-geo       0.91      0.86      0.88      6325\n",
      "       B-gpe       0.93      0.96      0.94      1727\n",
      "       B-nat       0.38      0.41      0.39        34\n",
      "       B-org       0.73      0.82      0.77      3265\n",
      "       B-per       0.85      0.85      0.85      2813\n",
      "       B-tim       0.86      0.90      0.88      2254\n",
      "       I-art       0.11      0.26      0.15        19\n",
      "       I-eve       0.15      0.33      0.20        18\n",
      "       I-geo       0.81      0.77      0.79       898\n",
      "       I-gpe       0.58      0.75      0.65        20\n",
      "       I-nat       0.17      0.33      0.22         3\n",
      "       I-org       0.75      0.81      0.78      2152\n",
      "       I-per       0.94      0.85      0.89      3987\n",
      "       I-tim       0.76      0.79      0.77       767\n",
      "           O       0.99      0.99      0.99     97070\n",
      "\n",
      "    accuracy                           0.96    121444\n",
      "   macro avg       0.62      0.67      0.64    121444\n",
      "weighted avg       0.96      0.96      0.96    121444\n",
      "\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "## 记录每一周次训练完的平均损失和验证损失\n",
    "loss_values, validation_loss_values = [], []\n",
    "\n",
    "for epoch in trange(epochs, desc=\"Epoch\"):\n",
    "    # ========================================\n",
    "    #               训练\n",
    "    # ========================================\n",
    "\n",
    "    #  训练模式\n",
    "    model.train()\n",
    "\n",
    "    # 损失\n",
    "    total_loss = 0\n",
    "\n",
    "    # 训练循环\n",
    "    for step, batch in enumerate(train_dataloader):\n",
    "        # 数据 gpu\n",
    "        batch = tuple(t.to(device) for t in batch)\n",
    "        b_input_ids, b_input_mask, b_labels = batch\n",
    "\n",
    "        # 梯度清零\n",
    "        model.zero_grad()\n",
    "\n",
    "        # 前向计算，获得损失\n",
    "        outputs = model(b_input_ids,\n",
    "                        token_type_ids=None,\n",
    "                        attention_mask=b_input_mask,\n",
    "                        labels=b_labels)\n",
    "        loss = outputs[0]\n",
    "\n",
    "        # 反向传播\n",
    "        loss.backward()\n",
    "\n",
    "        # 累加损失\n",
    "        total_loss += loss.item()\n",
    "\n",
    "        # 梯度裁剪，防止梯度爆炸\n",
    "        torch.nn.utils.clip_grad_norm_(parameters=model.parameters(),\n",
    "                                       max_norm=max_grad_norm)\n",
    "        # 更新参数\n",
    "        optimizer.step()\n",
    "        # 更新学习率\n",
    "        scheduler.step()\n",
    "\n",
    "    # 计算每一训练循环的平均损失\n",
    "    avg_train_loss = total_loss / len(train_dataloader)\n",
    "    print(\"Average train loss: {} at epoch {}\".format(avg_train_loss, epoch))\n",
    "\n",
    "    loss_values.append(avg_train_loss)\n",
    "\n",
    "    # ========================================\n",
    "    #               验证\n",
    "    # ========================================\n",
    "\n",
    "    # 验证模式\n",
    "    model.eval()\n",
    "\n",
    "    # 验证损失及验证精度\n",
    "    eval_loss, eval_accuracy = 0, 0\n",
    "\n",
    "    nb_eval_steps, nb_eval_examples = 0, 0\n",
    "\n",
    "    predictions, true_labels = [], []\n",
    "    for batch in valid_dataloader:\n",
    "        batch = tuple(t.to(device) for t in batch)\n",
    "        b_input_ids, b_input_mask, b_labels = batch\n",
    "\n",
    "        # 验证时，不更新梯度\n",
    "        with torch.no_grad():\n",
    "\n",
    "            # 没有提供标签值，返回值为 权重分布，而不是损失值\n",
    "            outputs = model(b_input_ids,\n",
    "                            token_type_ids=None,\n",
    "                            attention_mask=b_input_mask,\n",
    "                            labels=b_labels)\n",
    "\n",
    "        # 数据移动到 cpu 上\n",
    "        logits = outputs[1].detach().cpu().numpy()\n",
    "        label_ids = b_labels.to('cpu').numpy()\n",
    "\n",
    "        # 累加损失值\n",
    "        eval_loss += outputs[0].mean().item()\n",
    "        predictions.extend([list(p) for p in np.argmax(logits, axis=2)])\n",
    "        true_labels.extend(label_ids)\n",
    "\n",
    "    eval_loss = eval_loss / len(valid_dataloader)\n",
    "    validation_loss_values.append(eval_loss)\n",
    "    print(\"Validation loss: {} at epoch {}\".format(eval_loss, epoch))\n",
    "\n",
    "    # 计算精度\n",
    "    pred_tags = [\n",
    "        tag_values[p_i] for p, l in zip(predictions, true_labels)\n",
    "        for p_i, l_i in zip(p, l) if tag_values[l_i] != \"PAD\"\n",
    "    ]\n",
    "    valid_tags = [\n",
    "        tag_values[l_i] for l in true_labels for l_i in l\n",
    "        if tag_values[l_i] != \"PAD\"\n",
    "    ]\n",
    "    print(\"Validation Accuracy: {}at epoch {}\".format(\n",
    "        accuracy_score(pred_tags, valid_tags), epoch))\n",
    "    print(\"Validation F1-Score: {}at epoch {}\".format(\n",
    "        f1_score(pred_tags, valid_tags, average='macro'), epoch))\n",
    "    valid_report = classification_report(pred_tags, valid_tags)\n",
    "    print(valid_report)\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> 少数标签的识别正确率非常低，B-art，B-nat，I-art 等，对应的样本量非常少"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T02:10:42.448303Z",
     "start_time": "2020-05-28T02:10:42.163352Z"
    }
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAvAAAAGXCAYAAADVv2QFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdeVxVdf7H8fcFxBVldUlM0auYiXuhZmESLaYllOaSZZlpSZktNo22WFoyzehMEeqkaW6NJYlbmuvPLMZIbdF0FLRyKY3FyxVKAe/9/eFwpxugHAQuZ+b1fDzmMfI953u+3/ORufO+x+85x+J0Op0CAAAAYApenp4AAAAAgPIjwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeADAJR0/flzh4eF68803PT0VAPifR4AHgCry+eefKzw8XPPnz/f0VAAA/0V8PD0BAEDN17x5c33zzTfy9vb29FQA4H8eV+AB4H9MXl6e4T4Wi0W1a9eWj8//xnWf8+fP69dff/X0NACgVAR4AKgBCgoKNGfOHN1+++2KiIhQjx49NG7cOO3fv99tP4fDodmzZ2vEiBG67rrr1LFjR/Xt21cvvviiTp8+7bbvb9etf/TRR4qLi1OnTp00bdo0SdIf/vAHhYeH68yZM3rxxRfVq1cvRUREaOjQofr666/LPFZpbdu2bdNdd92liIgI9enTRwkJCSoqKipxnh9//LHuuOMORUREqG/fvkpMTFRqaqrCw8P14YcflqtWmZmZmjZtmqKjo9WxY0f16tVLDzzwgD777DPXPv369dPIkSNL9C1e1vTbsT788EOFh4crNTVVb731lm666SZ16tRJ69ev1+DBg9W7d+9Sz2XHjh0KDw/XwoULXW1Op1PLli1TXFycOnfurK5du2rkyJHauXNnuc4NAMrjf+NSCgDUYIWFhRo9erS+/PJL3XnnnRoxYoTy8vL0/vvva9iwYVqyZIkiIiJc+86fP18333yzoqOjVbduXe3du1fJycnas2ePkpOT5evr63b8zZs3a/HixRo2bJiGDh2qBg0auG0fPXq0AgMDNX78eNlsNi1YsEAPP/ywtmzZUmLf0mzfvl3Lli3T0KFDddddd2nLli1655131KhRI40bN86130cffaQnn3xSV155peLj4+Xt7a2UlBRt3bq13LU6fvy4hg0bpuzsbN15553q2LGjfv31V3399ddKTU3VddddV+5j/V7xl44hQ4aofv36CgsL06BBg/Tyyy9rx44duvHGG932T0lJkY+PjwYOHOhqe+aZZ7Ru3TrdcsstiouLU0FBgdasWaMHH3xQb775pqKjoys8PwAoRoAHAA9bunSp0tLSNG/ePF1//fWu9uHDh2vAgAH605/+pMWLF0uSfH199emnn6pOnTqu/YYNG6auXbtqypQp2rx5s/r37+92/IyMDK1evVpt2rQpdfwOHTropZdecv3cpk0bPfHEE1q7dq2GDh16yflnZGRo7dq1Cg0Ndc1n4MCBWrJkiSvAFxUV6bXXXlNgYKA++OADNWrUyLXvHXfcUY4qXTB16lT9/PPPJWolXfjXictx9uxZpaSkqG7duq62sLAwvfbaa0pJSXEL8Hl5edqyZYuuv/56BQUFSZI2bdqkNWvW6OWXX9Y999zj2ve+++7TkCFDNH36dPXr108Wi+Wy5gkALKEBAA9bvXq1Wrdurauvvlo5OTmu/xQUFKh3797avXu3zp49K+nCWvTi8H7+/HnZ7Xbl5OSoZ8+ekqRvvvmmxPGjoqLKDO+SNGrUKLefi4/1ww8/lGv+0dHRrvBePMfIyEhlZmYqPz9fkvTtt9/q559/VmxsrCu8S1L9+vXL9SVBkmw2m3bs2KHrr7++RHiXJC+vy/u/tGHDhrmFd0ny9/dXv379tHXrVtntdlf7xx9/rF9//VWxsbGuttWrV6t+/fq66aab3P4e7Xa7+vXrpxMnTuj777+/rDkCgMQVeADwuMOHD+vs2bPq1atXmfucPn1azZo1k3RhKcqCBQt04MABFRYWuu2Xm5tbom+rVq0uOn6LFi3cfg4ICJB0ITCXx+/7SxeCb/Ex6tevr+PHj0u6cEX790prK83Ro0fldDrVoUOHcu1vVFnzuPPOO/Xxxx9r/fr1rivrKSkpatSokdtV+cOHDys/P1+9e/cuc4zs7Oxyny8AlIUADwAe5nQ61a5dOz333HNl7hMYGChJ2rhxoyZOnKhOnTrpj3/8o5o1a6batWvr/Pnzeuihh+R0Okv0/f1V5d8r69GQpR3LSP/fHqO8x7qY4mNczhKU8+fPl7ntt8uSfisqKkqBgYFKSUnRPffcox9//FFffPGFhg4d6na/gdPpVGBgoP7yl7+UOUbbtm0rPHcAKEaABwAPa9mypU6fPq2ePXtechnIqlWrVLt2bS1atMgtmB8+fLiqp3lZipfYfPfddyW2ldZWmpYtW8pisZR4Mk9p/P39S/0XhGPHjpVrrN/y8fHRgAEDtGjRIh07dkxr166V0+l0Wz5TPL/vv/9enTt3Vv369Q2PAwDlxRp4APCwQYMGKTMzUwsWLCh1e1ZWluvP3t7eslgsbjdsOp1OzZ49u8rneTk6duyokJAQrVy50m2ZT35+vv7xj3+U6xj+/v664YYb9Mknnyg1NbXE9t9e5W/VqpW+++47nTp1ytVWUFCgpUuXVmj+xWE9JSVFq1atUlhYmDp37uy2z6BBg+RwODRz5sxSj/Hbv0cAuBxcgQeAKvbPf/5T586dK9EeEBCgYcOG6b777lNqaqr+9Kc/aefOnerZs6caNGigH3/8UTt37pSvr6/rKTS33HKLPv74Y91///0aNGiQioqKtHnz5hr/0iEfHx89++yzevrppzV48GDdfffd8vb21sqVK+Xv76/jx4+Xa2nM888/r/3792vMmDEaNGiQrr76ap07d05ff/21mjdvrmeeeUaSNGLECK1bt06jRo3S0KFDVVhYqFWrVl1yOVFZOnTooHbt2mnhwoXKy8vTk08+WWKfW2+9VXFxcVqyZIm+/fZb3XjjjQoICNDJkyf11Vdf6YcfftCWLVsqND4A/BYBHgCq2I4dO7Rjx44S7WFhYRo2bJhq1aqluXPnatmyZVq1apXrZUmNGzdWRESE21KN22+/Xfn5+Vq4cKESEhJcN1I+9dRTioyMrLZzqoiBAwfK29tbs2fP1htvvKHg4GDdfffdCg8PV3x8vGrXrn3JY7Ro0ULJycl666239Mknn2jVqlVq2LCh2rdv7/boxu7du2vGjBmaM2eOXn/9dTVu3FjDhg1Tx44dSzx1p7xiY2OVkJAgLy+vMh99+dprrykyMlLvv/++5s6dq8LCQoWEhKhDhw566qmnKjQuAPyexVkZdxYBAFBB77zzjhISErR8+XJ16dLF09MBgBqPNfAAgGpRUFBQ4ikw+fn5Wrp0qfz9/avs8ZAA8N+GJTQAgGpx7NgxjRkzRrfffrtCQ0OVmZmplStX6vjx43rppZfcHskIACgbAR4AUC0CAwPVpUsXrVmzRtnZ2fLx8VG7du301FNPqX///p6eHgCYBmvgAQAAABNhDTwAAABgIgR4AAAAwERYA2/Q6dP5cjiqf9VRUFADZWfnVfu4ZkW9jKFexlAvY6iXMdTLGOplHDUzxhP18vKyKCCgfpnbCfAGORxOjwT44rFRftTLGOplDPUyhnoZQ72MoV7GUTNjalq9WEIDAAAAmAgBHgAAADARAjwAAABgIgR4AAAAwEQI8AAAAICJEOABAAAAEyHAAwAAACZCgAcAAABMhAAPAAAAmAhvYq3h7DtTlfVhsg6dzpFPQKCC4+5Sw569PT0tALgkPr8AmFlN/gwjwNdg9p2pOrVooZwFBZKkopxsnVq0UJJqzC8QAJSGzy8AZlbTP8MsTqfT6anB8/PzNWvWLG3YsEF2u11Wq1Xjx49XdHT0Rfvt2rVLycnJ2r9/vzIyMlRUVKSDBw+W2O+7775TQkKCDh48qOzsbNWuXVutW7fWiBEjdMcdd1RoztnZeXI4qqdkRyY9paKc7BLtFh8f1WndplrmYFa1fH1UWFDk6WmYBvUyhnpd2tkjh+UsKlkjPr8ujd8vY6iXcdTs0sr6DPMJDFLrP/2lysf38rIoKKhBmds9egU+Pj5e+/fv19NPP63Q0FCtXLlS8fHxmjNnjqKiosrst3PnTqWlpenqq6+Wj4+P9u3bV+p+eXl5Cg4O1q233qqmTZvq7NmzWrNmjZ555hmdPHlSDz/8cFWdWqUoLbxLKvUXCgBqkrI+p/j8AmAGZX1WlZXNqpvHrsBv375dDz/8sBITExUTEyNJcjqdGj58uGw2m9avX19mX4fDIS+vC/ffTp8+XYsWLSr1CnxZ7rnnHmVlZWnLli2G510TrsBX17c/MwsJ8VNm5hlPT8M0qJcx1OvS+PyqOH6/jKFexlGzS/P0Z9ilrsB77Ck0mzZtkp+fn9tyGYvFotjYWB05ckQZGRll9i0O7xXl7+8vH5+av/w/OO4uWXx93dosvr4KjrvLQzMCgPLh8wuAmdX0zzCPpdj09HRZrdYSYTw8PFySdOjQIVmt1koZy+FwyOFwyG63a/369fr000/10ksvVcqxq1LxTRJZHyarqAbeAQ0AZeHzC4CZ1fTPMI8FeJvNplatWpVob9SokWt7ZXn99df1zjvvSJJq1aqlyZMna/DgwZV2/KrUsGdvNezZm3/uAmA6fH4BMLOa/Bnm0XUkFoulQtuMuv/++9W/f3/l5ORo+/bteuWVV/Trr79q9OjRho91sfVIVS0kxM9jY5sR9TKGehlDvYyhXsZQL2Ool3HUzJiaVi+PBXh/f/9Sr7Ln5uZK+s+V+MrQtGlTNW3aVJIUFRUli8WimTNnKjY2VoGBgYaOVZ03sf5WTfz2V5NRL2OolzHUyxjqZQz1MoZ6GUfNjPFEvWrsTaxWq1WHDx+Ww+Fwaz906JAkqV27dlU2dkREhIqKinTs2LEqGwMAAACoCh4L8DExMbLb7dq6datbe0pKisLCwirtBtbSfP755/Ly8lJoaGiVjQEAAABUBY8toYmKilJkZKQmT54sm82m0NBQpaSkaPfu3UpKSnLtN3LkSKWlpbk95z0nJ0dpaWmSpKNHj0qSNmzYIElq3ry5IiIiJEmvvfaazp8/r27duik4OFinT5/Wxo0btXbtWj344IMKCgqqrtMFAAAAKoXHArzFYlFSUpJmzpypWbNmyW63y2q1KjExUf369bto3/T0dE2YMMGtrfjn2NhYzZgxQ5LUuXNnvffee1q3bp3sdrvq1q2r8PBwJSQk6M4776yaEwMAAACqkMfexGpW3MRqDtTLGOplDPUyhnoZQ72MoV7GUTNjuIkVAAAAwGUhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATMTHk4Pn5+dr1qxZ2rBhg+x2u6xWq8aPH6/o6OiL9tu1a5eSk5O1f/9+ZWRkqKioSAcPHiyx3969e7VixQrt3r1bJ06cUL169dS+fXs98sgj6tGjR1WdFgAAAFBlPHoFPj4+XmvWrNGECRM0d+5cWa1WxcfHa/v27Rftt3PnTqWlpally5Zq3759mft99NFH2rdvn+6++27Nnj1bU6dOlcPh0L333quNGzdW9ukAAAAAVc7idDqdnhh4+/btevjhh5WYmKiYmBhJktPp1PDhw2Wz2bR+/foy+zocDnl5XfjuMX36dC1atKjUK/DZ2dkKCgpyayssLNSAAQNUv359ffjhh4bnnZ2dJ4ej+ksWEuKnzMwz1T6uWVEvY6iXMdTLGOplDPUyhnoZR82M8US9vLwsCgpqUPb2apyLm02bNsnPz89tuYzFYlFsbKyOHDmijIyMMvsWh/dL+X14l6RatWqpffv2OnnypPFJAwAAAB7msQCfnp4uq9VaIoyHh4dLkg4dOlQl4xYUFOjLL79U27Ztq+T4AAAAQFXyWIC32Wxq1KhRifbiNpvNViXjvv766/r55581bty4Kjk+AAAAUJU8+hQai8VSoW0VtXjxYi1atEiPPfaYevXqVaFjXGw9UlULCfHz2NhmRL2MoV7GUC9jqJcx1MsY6mUcNTOmptXLYwHe39+/1Kvsubm5klTq1fnLsXz5ck2fPl2jRo1SfHx8hY/DTazmQL2MoV7GUC9jqJcx1MsY6mUcNTOGm1h/w2q16vDhw3I4HG7txWvf27VrV2ljffDBB3rxxRc1fPhwPffcc5V2XAAAAKC6eSzAx8TEyG63a+vWrW7tKSkpCgsLk9VqrZRxkpOT9fzzz2vw4MF6/vnnK+WYAAAAgKd4bAlNVFSUIiMjNXnyZNlsNoWGhiolJUW7d+9WUlKSa7+RI0cqLS3N7TnvOTk5SktLkyQdPXpUkrRhwwZJUvPmzRURESFJWr9+vaZMmaKOHTsqLi5OX3/9tdscunTpUqXnCAAAAFQ2jwV4i8WipKQkzZw5U7NmzZLdbpfValViYqL69et30b7p6emaMGGCW1vxz7GxsZoxY4akCy+Lcjgc2rt3r4YOHVriOKW9/AkAAACoyTz2Jlaz4iZWc6BexlAvY6iXMdTLGOplDPUyjpoZw02sAAAAAC4LAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCI+np5Afn6+Zs2apQ0bNshut8tqtWr8+PGKjo6+aL9du3YpOTlZ+/fvV0ZGhoqKinTw4MES++Xl5emtt97S/v37tX//ftntdr322muKi4urqlMCAAAAqozHr8DHx8drzZo1mjBhgubOnSur1ar4+Hht3779ov127typtLQ0tWzZUu3bty9zP5vNphUrVsjpdOqGG26o7OkDAAAA1cqjV+C3b9+u1NRUJSYmKiYmRpLUs2dPHTt2TDNmzFBUVFSZfR999FHFx8dLkqZPn659+/aVul/z5s31xRdfSJIOHDigtWvXVvJZAAAAANXHo1fgN23aJD8/P7flMhaLRbGxsTpy5IgyMjLK7OvlVb6pWyyWy54nAAAAUFN4NMCnp6fLarWWCOPh4eGSpEOHDnliWgAAAECN5dEAb7PZ1KhRoxLtxW02m626pwQAAADUaB5/Cs3FlrjUxOUvQUENPDZ2SIifx8Y2I+plDPUyhnoZQ72MoV7GUC/jqJkxNa1eHg3w/v7+pV5lz83NlaRSr857WnZ2nhwOZ7WPGxLip8zMM9U+rllRL2OolzHUyxjqZQz1MoZ6GUfNjPFEvby8LBe9aOzRJTRWq1WHDx+Ww+Fway9e+96uXTtPTAsAAACosTwa4GNiYmS327V161a39pSUFIWFhclqtXpoZgAAAEDN5NElNFFRUYqMjNTkyZNls9kUGhqqlJQU7d69W0lJSa79Ro4cqbS0NLc3rebk5CgtLU2SdPToUUnShg0bJF149ntERIRr3+3bt+vXX3/ViRMnJEn79u1TvXr1JEm33npr1Z4kAAAAUIk8GuAtFouSkpI0c+ZMzZo1S3a7XVarVYmJierXr99F+6anp2vChAlubcU/x8bGasaMGa72qVOnusK7JC1dulRLly6VJLcvBQAAAEBNZ3E6ndV/R6aJcROrOVAvY6iXMdTLGOplDPUyhnoZR82M4SZWAAAAAJeFAA8AAACYCAEeAAAAMBECPAAAAGAiBHgAAADARAjwAAAAgIkQ4AEAAAATIcADAAAAJkKABwAAAEyEAA8AAACYCAEeAAAAMBECPAAAAGAiBHgAAADARAjwAAAAgIn4eHoCAAAARhUVFSo/367s7B9VWFjo6emYys8/e8nhcHh6GqZR2fXy9q6lBg0aqW7d+hU+BgEeAACYSlFRoXJyTqlePT81auQvp9NLFovF09MyDR8fLxUVEeDLqzLr5XQ6VVh4TjZblnx8aqlWLd8KHYclNAAAwFTy8+2qV89PDRo0ko9PLcI7TMNiscjXt47q12+kvDxbhY9DgAcAAKZy7tyvqlOn4ssPAE+rU6euCgsLKtyfAA8AAEzF4Tgvb29vT08DqDAvL285HOcr3r8S5wIAAFAtWDYDM7vc318CPAAAAGAiBHgAAIAaYN++vZo/f67OnDlTJcfv06eH5s+fW+19L8eePbvUp08P7dmzq9rHrsl4jCQAAEANsH//Xi1Y8Lb69x8oPz+/Sj/+nDkL1Lhx42rvi8pHgAcAADAZh8Mhh8MhH5/yR7mOHSMqPN7l9EXlMxzgf/jhB/3www+64YYbXG1ff/21Zs+eLZvNptjYWN1zzz2VOkkAAICq9s9vT+rD7YeVbT+noIa1FRfVRr2ublotY8+fP1cLFrwtSRo8+A5X+wcfrFazZleoT58eGjx4mJo1u0LJyct18uRPmjXrLXXrdmFpyz//+ZmOHz8mp9OhK69sqaFDRyo6OsZtjD59euiBB8Zo7NhH3MZcsuQDvfPO37VzZ6pq166tXr2u0+OPP6UGDRqU6Dt69FjDfc+cOaPExFn65JP/U1FRoTp16qonnnhaw4bFuR3TiJSUFUpOfl/Hjx9TvXr11KNHpMaNi1ezZle49jl48F+aN2+2DhzYr/z8PPn7B6h9+6v03HMvqmHDhpKklStXKCVlhU6cOC4vL281btxYt9xyu0aOHGV4TtXJcID/85//LJvN5grwOTk5GjNmjH755RfVrl1bL730koKCgnTTTTdV+mQBAACqwj+/Pal31/9LBf9+42a2/ZzeXf8vSaqWED9w4CDl5+fp/fff0/TprysoKFiSXP8tSdu2bVZISIjGjYtX3br11Lx5qCTp1KmTiosbrMaNm+j8+fPas2eXXn55in79NV8DBgy65NiTJz+jfv1iNHDgIB0+nK6//z1JkvTHP7542X0dDocmTXpChw79Sw8++LDatQvXvn179cwzE4wV6DeKvzz07z9Q48c/oaysn/X223M0btyDWrhwmQICAvXLL79o4sTxatcuXJMmTZafn5+ysjL1xRefq6DgwvPXN23aoL/+9XWNHPmAunTpJofDoWPHflBWVlaF51ZdDAf4ffv2aciQIa6f161bp7y8PKWkpKhVq1a677779O677xLgAQBAtfps70/69JufKtT38I+5KjrvdGsrKHJowUcH9MlXPxo6Vp9OzXRdRDNDfRo3bqKmTS/0adcu3O1Ksms+BQX661+TVL9+A7f23wZth8Oh7t2vUW6uTcnJ75crwN9xR6zuuWeEJOmaayJ14sQJrVu3Ws8998IlH3d4qb47d6Zq796v9eyzUzRw4KB/79dTPj61NHdu4iXn9nt2u11Lly5S37793M47PPwqPfjgvVq+fJnGjYvX0aPfy27P1fjxT6ht23au/WJibnX9ee/er9W6dRs99NA4V9u11/Y0PCdPMPwUmpycHLebGHbs2KFu3bqpXbt28vX1Vf/+/XX48OFKnSQAAEBV+n14v1S7J3Tvfk2J8C5deFLLk0/Ga+DAmxUVFam+fXtq7dpV+uGHH8p13D59otx+btPGqoKCc8rJyb7svl99tVuS1K+f+4XdmJhbyjW33/v2229UUHBON9/c3629bdtwtW5tdT2tJjT0Svn5NVRCwjStX79WP/54osSxOnToqIyMdL3++qtKS9upvLy8Cs3JEwxfga9bt67r8Ubnz5/X7t27NXLkSNf2OnXqmKoAAADgv8N1EcavfBd7JukzZdvPlWgPalhbz47odrlTqxS/XU5TbN++vZo4cby6deuhJ5+cpJCQxvLx8dHKlSu0bt3qch23YcNGbj/7+vpKkmupyeX0tdvt8vWtXeKLR0BAYLnm9nt2u12SFBgYVGJbUFCwfvzxuCSpQYMGSkz8uxYunKe//e0vyss7o2bNmisubrCGDh0hi8WiW2+9XUVFhVqzZpXWrl0lSercuavGjo3X1Vd3rND8qovhAN+2bVutWrVKd955pzZs2KBffvlF1113nWv7iRMnFBhYsb8UAAAAT4iLauO2Bl6SfH28FBfVxoOzclfacpatWzfK29tHCQmzXOFZkoqKCqtzamVq2LCRCgrOKT8/zy3Enz6dU+HjSSr1Xweys7PcvlC0aWPVK6/MkNPpVEZGulatStZbb/1Vfn5+GjDgTknSgAGDNGDAIJ09e1Z79uzS3Llv6cknx+uDD9a4bnStiQwvoRk9erQOHjyo3r176+WXX9ZVV12lHj16uLZ/9tln6tChQ6VOEgAAoCr1urqp7r+tvYIa1pZ04cr7/be1r7an0EhSrVoXAvi5cyX/JaAsFotF3t7e8vL6T6Q7fTpHO3Z8Uunzq4iuXS/868XWrZvd2jdt+rhCx+vYsZN8fWtr48aP3NozMtJ15EiGune/pkQfi8Witm3baeLESfL29lZGRnqJferUqaPevfto6NARys/P18mTxu57qG6Gr8D37dtX7777rrZs2aIGDRro3nvvdX0jPH36tJo2bapBgy59wwQAAEBN0uvqptUa2H+vdesLV/uTk9/XLbfcJh8fH7Vp01a1atUqs0+vXn20fPkyTZ06RXfcEaucnGwtXDhPgYGB+uWX/OqaepkiI3srIqKz/vSFB5YAACAASURBVPrX13XmjF1t24br22/3asOGdZLk9sWjPPz8/HTffQ9o3rw5evXVqerXL0ZZWZmaN2+OgoNDNGTIcEnSZ5/tUErKCl1/fV81a3aFHA6HNm5cL4fDocjIXpKkhIRpql27jiIiOisoKEg///yzFi9eoCZNmqpVq9aVW4hKVqEXOV1zzTW65pqS33ACAgKUmGj8jmIAAID/dZ07d9W9947S+vVrtGpVshwOh+s58GXp0eNaTZo0WcuWLdKzz05UkyZNNWTIcOXkZLueK+9JXl5eSkiYpcTEWVq0aIGKigoVEdFZzz//isaOHVXqTbmXMmrUQ/L3D1By8nJt2rRBdevW0zXXROqRRx5XQECAJKlFixaqV6++lixZqKysLPn6+iosLEyvvDJDvXpdWPrdqVMXrV+/Vlu2bFRe3hn5+weoW7fuevDBsW7LkWoii9PpvOzbq4uKirRlyxbl5ubqxhtvVEhISGXMrUbKzs6Tw1H9d6SHhPgpM/NMtY9rVtTLGOplDPUyhnoZQ70u7eTJH9S0aUtJko+Pl4p+s24dl1YTarZx4wa9/PIUJSXNU6dOXTw6l0upqnr99vf497y8LAoKKvvLjeEr8H/605/0+eefKzk5WZLkdDr1wAMPaNeuXXI6nfL399f777+vK6+80uihAQAA8F9m48b1On06x7VE6Ntv92nZssXq3LlrjQ/vNZXhAL9jxw717t3b9fPWrVv1xRdf6KGHHtJVV12lV155RX//+981bdq0Sp0oAAAAzKdu3br6xz/W68SJYzp79qyCg0PUv/9AjRkz7tKdUSrDAf7kyZNq2fI/l/u3bdum0NBQPf3005Kk9PR0rVmzplzHys/P16xZs7RhwwbZ7XZZrVaNHz9e0dHRF+23a9cuJScna//+/crIyFBRUZEOHjxY6r6FhYWaPXu2Vq5cqczMTLVs2VKjRo3S4MGDy3nGAAAAqKjrr++r66/v6+lp/Fcx/BjJwsJCeXt7u37+/PPP3a7It2jRQpmZmeU6Vnx8vNasWaMJEyZo7ty5slqtio+P1/bt2y/ab+fOnUpLS1PLli3Vvn37i+770ksvaf78+br//vs1f/583XDDDZoyZYree++9cs0RAAAAqEkMB/imTZvqq6++knThavuxY8fcnkiTnZ2tevXqXfI427dvV2pqqqZNm6bBgwerV69eSkhIUJcuXTRjxoyL9n300Ue1ZcsWvfHGG+rWrey3o6Wnp2vFihWaOHGiRo0apcjISD377LMaMGCAZs2aZeg5qwAAAEBNYDjA33777UpJSdHYsWM1duxYNWjQQFFRUa7tBw4cKNcNrJs2bZKfn5/bchmLxaLY2FgdOXJEGRkZZU+6nM8M3bx5sywWi+644w639ri4OOXm5mrnzp3lOg4AAABQUxgO8GPHjlVsbKy++uorWSwWJSQkuF41e+bMGW3dulW9evW65HHS09NltVpLhPHw8HBJ0qFDh4xOrdQxgoODFRgYWGVjAAAAANXJ8E2svr6+evXVV0vdVr9+fX366aeqU6fOJY9js9nUqlWrEu2NGjVybb9cNptN/v7+VToGAAAAUJ0q9CbWsnh5ecnPz6/c+1sslgptM6K04xS3VWSMiz1Uv6qFhJS/tqBeRlEvY6iXMdTLGOp1cT//7CUfn//8C/5v/4zyoWbGVEW9vLy8Kvy/9QoF+F9++UXz5s3Tpk2bdPz4cUlSaGiobr75Zo0ePbpcN7H6+/uXegU8NzdX0n+ukl8Of3//UpfJFI9bkTF4E6s5UC9jqJcx1MsY6mUM9bo0h8PhejNmTXirqNlQM2Oqql4Oh6PM/61f6k2shr9O2Gw2DR48WElJScrKytJVV12lq666StnZ2Xrrrbc0ePDgci1NsVqtOnz4sBwO94IUB+527doZnVqpY2RlZen06dNVNgYAAEBNNH36S7r77oGun3/66Uf16dNDa9euNtzXiMWLF+qTT/6vRPv8+XPVp0+PCh3zcuzZs0t9+vTQnj27qn3sqmI4wL/xxhs6cuSInn/+eX366adatmyZli1bph07duiFF17Qd999p8TExEseJyYmRna7XVu3bnVrT0lJUVhYmKxWq9GplXDTTTfJ6XRq9Wr3X9SVK1eqYcOGioyMvOwxAAAAzCAoKFhz5izQddddX6XjLF26UDt2/F+J9oEDB2nOnAVVOvb/CsNLaLZu3arBgwdrxIgRbu3e3t4aPny4Dhw4oM2bN2vKlCkXPU5UVJQiIyM1efJk2Ww2hYaGKiUlRbt371ZSUpJrv5EjRyotLc3tTas5OTlKS0uTJB09elSStGHDBklS8+bNFRERIenCFfa4uDjNnDlTTqdTHTp00LZt27R69Wq98MIL5brZFgAA4L+Br6+vOnaM8NgSmsaNm6hx4ybVPu5/I8MBvnjZTFk6dOiglStXXvI4FotFSUlJmjlzpmbNmiW73S6r1arExET169fvon3T09M1YcIEt7bin2NjY91eBDV16lQ1adJECxcuVFZWllq0aKFXXnlFQ4YMueQcAQAAqsP27ds0efIzevPNueratbvbtoUL52nBgreVnLxWwcEh+uKLnVqxYrkOHvyX7Ha7Gjduouuu66MHH3xY9euXvW76p59+1ODBd2jKlJd0660DXO1r167S0qXv6uTJn9S0aTONGHF/qf3nz5+rf/7zMx0/fkxOp0NXXtlSQ4eOVHR0jGuf4iUy69ev1fr1ayVJt902QJMnv6T58+dqwYK39emn/1nK8uuvv2revDnatm2zcnKyFRgYpOjom/XQQ2NVu3Ydt+MOHjxMbdu205IlC3Xq1EmFhl6phx9+tML/opCSskLJye/r+PFjqlevnnr0iNS4cfFq1uwK1z4HD/5L8+fP0f793yo/P0/+/gFq3/4qPffci67HqK9cuUIpKSt04sRxeXl5q3Hjxrrllts1cuSoCs2rPAwH+ODgYB04cKDM7QcOHFBwcHC5jtWgQQO98MILeuGFF8rcZ/HixSXaIiMj3a7IX4yvr6+eeOIJPfHEE+XaHwAA/G+y70xV1ofJKsrJlk9gkILj7lLDnr2rZezrrrte/v7+Wr9+bYkAv2HDR+rRI1LBwSGSpBMnjqtTpy6644441atXT8ePH9PixQt04MB+JSXNMzTu2rUpmjFjmm644UY99tiTOnPGrvnz56qoqKjEu3pOnTqpuLjBaty4ic6fP689e3bp5Zen6Ndf8zVgwCBJ0pw5CzRx4nh16dJV99//kCQpICCg1LEdDoeefXai9u79WqNGPaQOHa7Wt9/u08KF85SRcUgzZya6PTHw00+369tv9+qhhx5R3bp1tWzZIv3xj09r2bJkNW8eaui8i79M9O8/UOPHP6GsrJ/19ttzNG7cg1q4cJkCAgL1yy+/aOLE8QoPb69JkybLz89PWVmZ+uKLz1VQUCBJ2rRpg/7619c1cuQD6tKlmxwOh44d+0FZWVmG5mOU4QB/4403avny5erQoYOGDBni+st1OBz64IMPlJycrHvuuafSJwoAAFBV7DtTdWrRQjn/HcyKcrJ1atFCSaqWEO/j46ObbrpV69at1sSJk1S3bl1J0jfffKXjx49qzJhHXPsOGnS3689Op1MREZ115ZUtNX78GKWnH1LbtuV7SIfD4dDbb8/RVVddrenT/+QKyx07dtLw4XcpJKSx2/5//OOLbn27d79Gubk2JSe/7wrwHTtGyNvbS/7+AerYMeKi43/++T+1Z88uTZz4jO6660J2vOaanqpXr77eeOMvSkvbqcjI/7wctLCwUH/722xXbcLD22vQoNu0desmjRz5QLnOWZLsdruWLl2kvn37uZ1TePhVevDBe7V8+TKNGxevo0e/l92eq8cfn6iwsP/cmxkTc6vrz3v3fq3WrdvooYfGudquvbZnuedSUYYD/OOPP67U1FRNnTpVb775psLCwiRJ3333nXJycnTllVfqscceq/SJAgAAXIw99TPlfvpJhfqePXJYzqIitzZnQYFOLXxHuZ9sN3SsRn1uUMPe1xmew+23D9SKFf/Q//3fFt1224UlLuvXr5OfX0Ndf32Ua7/s7CwtXrxAn322Q1lZmSosLHRtO3r0+3IH+KNHf1B2dpZGjLjf7Up38+ahiojorJMnf3Lbf8+eXVqyZKHS0w/JZjstp/PCY7V9fWsbPldJ+vLLC0tpbr65v1v7rbferjfe+Iv27NnlFuC7d+/hCu+SFBgYpICAgBLzvJRvv/1GBQXnSozbtm24Wre2up5WExp6pfz8GurVV19WXNwQde7cVVdc0dytT4cOHbVy5Qq9/vqriorqpw4dOqpBg6p/Z5DhAB8QEKDk5GS9/fbb2rx5s/bu3StJatGihe6++26NGTOmWiYOAABQWX4f3i/VXhXatg2X1dpOH320RrfdNkDnzp3Vtm2bFBNzm3x9fSVduPI9ceJ4nT59WqNGPaTWrduobt26OnXqlCZPfkbnzp0r93jF794JCgoqsS0oKMgtGO/bt1cTJ45Xt2499OSTkxQS0lg+Pj5auXKF1q279GMpS2O32+Xr61viJaANGzaUr6+v7Pbc37X7lzhGrVq+ruUsRsaVLnwB+L2goGD9+OOFdxw1aNBAiYl/16JF8/W3v/1FeXln1KxZc8XFDdbQoSNksVh06623q6ioUGvWrNLataskSZ07d9XYsfG6+uqOhuZlRIVe5NSgQQNNnDhREydOLLHtH//4hxYtWqSPPvrosicHAABQXg17X1ehK9+SdGTSUyrKyS7R7hMYpBaTnrvcqZXbbbfdrsTEv+qnn37Uvn3fKC8vT/37/+eG08OHM3TkyGFNnvyS6yq9JOXl5Rkeq/iFltnZJc/7921bt26Ut7ePEhJmub5MSFJRUeHvu5Zbw4aNVFBQoDNnzriFeLvdroKCAjVsePkv9SxrXEnKKeXvOzs7y23cNm2smj49QYWF55WRka5Vq5L11lt/lZ+fnwYMuFOSNGDAIA0YMEhnz57Vnj27NHfuW3ryyfH64IM1rhtdK1ulvxf29OnT+u677yr7sAAAAFUmOO4uWX4TTCXJ4uur4Li7qnUeN9/cX97e3v9+iss6hYW11lVXXf2fOf17qUutWrXc+q1Zc+knAP7elVe2VFBQsDZt2uDWfuLEce3d+7Vbm8Vikbe3t9uNradP52jHjpJLlmrV8i3XvwR0736NJGnjRveLvsU/F2+vbB07dpKvb+0S42ZkpOvIkYxSx7VYLGrbtp0mTpwkb29vZWSkl9inTp066t27j4YOHaH8/HydPPljlcxfquAVeAAAgP8mxTeqeuopNMUCAgLUs2dvrVr1oWy20xo7Nt5te6tWYbriiuaaM+fCSzPr12+gzZs/1sGD/zI8lpeXl8aMGacZM6bpj398RgMG3Km8vDOaN2+OgoLcnyjYq1cfLV++TFOnTtEdd8QqJydbCxfOU2BgoH75Jd9t39at2+irr/YoNfVTBQYGqlEjf7dHMxa79tqe6tHjWr311t+Ul5enDh2u1v7932rhwnm69tpeuuaaqnnhpp+fn+677wHNmzdHr746Vf36xSgrK1Pz5s1RcHCIhgwZLkn67LMdSklZoaioG9WkSTM5HA5t3LheDofDtTY/IWGaateuo4iIzgoKCtLPP/+sxYsXqEmTpmrVqnWVzF8iwAMAAEi6EOKrO7CXpn//O/Tpp5/I29tbt9xym9s2Hx8fzZgxU3/725+VkDBdvr61dN11N+ill17VQw+NNDxW8dNjlixZpMmTn1HTps10//2j9dVXe/Tll7td+/Xoca0mTZqsZcsW6dlnJ6pJk6YaMmS4cnKytWDB227HjI+fqD//+TVNmfKsCgrOuZ4D/3sWi0UzZszUvHlztGrVh3rnnb8rKChYgwcP1ejRY91urK1so0Y9JH//ACUnL9emTRtUt249XXNNpB555HHXYy9btGihevXqa/HihcrMzJSvr6/CwsL0yisz1KvXhaVanTp10fr1a7Vly0bl5Z2Rv3+AunXrrgcfHOu21KiyWZzFtxBXktmzZ+uNN9646LPizSw7O08OR6WWrFxCQvyUmXmm2sc1K+plDPUyhnoZQ72MoV6XdvLkD2ratKUkeeytomZGzYypqnr99vf497y8LAoKKvuhMJW+Bh4AAABA1SnXEpoFCxaU+4B79uyp8GQAAAAAXFy5AnxCQoKhg1blmiUAAADgf1m5AvyiRYuqeh4AAAAAyqFcAf7aa6+t6nkAAAAAKAduYgUAAABMhAAPAABMp5Kfgg1Uq8v9/SXAAwAAU/H2rqXCwnOengZQYYWFBfL2rvj7VAnwAADAVBo0aCSbLUv5+WdUVFTE1XiYhtPpVEHBOdlsmWrQwL/Cx6l49AcAAPCAunXry8enlvLybDp3Lk8FBQWenpKpeHl5yeHgTazlVdn18vb2kZ9fgOrWrV/hYxDgAQCA6dSq5auAgMYKCfFTZuYZT0/HVKiZMTWxXiyhAQAAAEyEAA8AAACYCAEeAAAAMBECPAAAAGAiBHgAAADARAjwAAAAgIkQ4AEAAAATIcADAAAAJkKABwAAAEyEAA8AAACYCAEeAAAAMBECPAAAAGAiBHgAAADARAjwAAAAgIkQ4AEAAAATIcADAAAAJuLRAJ+fn69p06apT58+6tSpk+Li4rRly5Zy9T169KgeffRRde/eXV27dtWYMWOUkZFRYr+TJ0/qqaeeUmRkpGuMTZs2VfapAAAAANXCowE+Pj5ea9as0YQJEzR37lxZrVbFx8dr+/btF+2XnZ2t4cOH68SJE0pISNDMmTOVm5ure++9VydPnnTtl5ubq2HDhmnXrl169tlnlZiYqDZt2uixxx7Thg0bqvr0AAAAgErn46mBt2/frtTUVCUmJiomJkaS1LNnTx07dkwzZsxQVFRUmX3nz58vu92u5ORkNWnSRJLUpUsXRUdHa/bs2Zo6daok6b333tNPP/2kDz/8UB06dJAk3XDDDTp58qRmzJihm2++WV5erCICAACAeXgsvW7atEl+fn6Kjo52tVksFsXGxurIkSOlLocptnnzZvXu3dsV3iUpICBAN954o9vymK+++kqNGzd2hfdi0dHR+umnn/T1119X4hkBAAAAVc9jAT49PV1Wq7XEFfDw8HBJ0qFDh0rtd/bsWR09elTt2rUrsS08PFzZ2dnKzs6WJBUWFsrX17fEfrVq1XLNAQAAADATjwV4m82mRo0alWgvbrPZbKX2y83NldPpLLWvv7+/W982bdroxx9/1KlTp9z227NnjyTp9OnTFT8BAAAAwAM8tgZeurBkpiLbyrNdku655x699957euqpp/Tiiy8qODhYa9eu1ccff1zuY/xeUFADw30qS0iIn8fGNiPqZQz1MoZ6GUO9jKFexlAv46iZMTWtXh4L8P7+/qVeZc/NzZWkUq+wF7dbLJZS+xa3FV+Jb9OmjRITE/Xiiy9qwIABkqRmzZrpD3/4g1555RU1btzY8Lyzs/PkcDgN97tcISF+ysw8U+3jmhX1MoZ6GUO9jKFexlAvY6iXcdTMGE/Uy8vLctGLxh4L8FarVRs3bpTD4XBbB1+89r20Ne6SVKdOHbVo0aLUNfKHDh1SYGCggoKCXG1RUVHatm2bfvjhB50/f16tWrXSRx99JIvFou7du1fyWQEAAABVy2Nr4GNiYmS327V161a39pSUFIWFhclqtZbZ96abblJqaqoyMzNdbTabTdu2bXM9kvK3LBaLWrVqpTZt2uj8+fN69913FRUVpRYtWlTeCQEAAADVwGNX4KOiohQZGanJkyfLZrMpNDRUKSkp2r17t5KSklz7jRw5UmlpaTp48KCrbfTo0Vq9erUefvhhjR8/Xj4+Ppo9e7Z8fHw0btw4134Oh0Ovvvqqrr32WjVq1EhHjx7VokWLdObMGb355pvVer4AAABAZfBYgLdYLEpKStLMmTM1a9Ys2e12Wa1WJSYmql+/fhftGxwcrKVLlyohIUGTJk2S0+lU9+7dtWTJEl1xxRVu+x4/flwbNmyQzWZTYGCg+vbtq8cee0whISFVeXoAAABAlbA4nc7qvyPTxLiJ1RyolzHUyxjqZQz1MoZ6GUO9jKNmxtTEm1g9tgYeAAAAgHEEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiXg0wOfn52vatGnq06ePOnXqpLi4OG3ZsqVcfY8ePapHH31U3bt3V9euXTVmzBhlZGSU2C8zM1NTp05VdHS0OnXqpH79+umFF17QqVOnKvt0AAAAgCrn48nB4+PjtX//fj399NMKDQ3VypUrFR8frzlz5igqKqrMftnZ2Ro+fLiCgoKUkJAgb29vzZ49W/fee69SUlLUtGlTSVJBQYHuvfde5ebm6vHHH1ebNm10+PBhvfHGG9q5c6fWrl0rX1/f6jpdAAAA4LJ5LMBv375dqampSkxMVExMjCSpZ8+eOnbsmGbMmHHRAD9//nzZ7XYlJyerSZMmkqQuXbooOjpas2fP1tSpUyVJX375pb7//ntNmzZNgwcPliRFRkaqVq1amjJlir788ktFRkZW8ZkCAAAAlcdjS2g2bdokPz8/RUdHu9osFotiY2N15MiRUpfDFNu8ebN69+7tCu+SFBAQoBtvvFGbNm1ytfn4XPh+4ufn59a/+GeuvgMAAMBsPBbg09PTZbVa5eXlPoXw8HBJ0qFDh0rtd/bsWR09elTt2rUrsS08PFzZ2dnKzs6WdOGqfKdOnZSYmKi9e/cqPz9fe/fuVWJioq655hp17ty5ks8KAAAAqFoeC/A2m02NGjUq0V7cZrPZSu2Xm5srp9NZal9/f3+3vt7e3lq4cKFatmypu+++W926ddPdd9+tpk2bau7cuSW+PAAAAAA1nUdvYrVYLBXaVp7tklRYWKinnnpK6enpevXVV9WyZUsdPnxYiYmJevTRRzVv3jzVqlXL0JyDghoY2r8yhYT4XXonuFAvY6iXMdTLGOplDPUyhnoZR82MqWn18liA9/f3L/Uqe25uriSVeoW9uN1isZTat7it+Ep8cnKytm3bplWrVql9+/aSpB49eigsLEwjR47UunXrNGjQIEPzzs7Ok8PhNNSnMoSE+Ckz80y1j2tW1MsY6mUM9TKGehlDvYyhXsZRM2M8US8vL8tFLxp7bA2J1WrV4cOH5XA43NqL176XtsZdkurUqaMWLVqUukb+0KFDCgwMVFBQkCRp//79qlWrliu8F+vYsaMkXfRGWQAAAKAm8liAj4mJkd1u19atW93aU1JSFBYWJqvVWmbfm266SampqcrMzHS12Ww2bdu2zfVISklq3LixCgsLtX//frf+X331lSS5PcUGAAAAMAOPLaGJiopSZGSkJk+eLJvNptDQUKWkpGj37t1KSkpy7Tdy5EilpaXp4MGDrrbRo0dr9erVevjhhzV+/Hj5+Pho9uzZ8vHx0bhx41z7xcXFaeHChYqPj9cjjzyiFi1a6PDhw0pKSlJwcLAGDBhQrecMAAAAXC6PBXiLxaKkpCTNnDlTs2bNkt1ul9VqVWJiovr163fRvsHBwVq6dKkSEhI0adIkOZ1Ode/eXUuWLNEVV1zh2u+KK67QBx98oMTERM2ePVtZWVkKCQlRVFSU4uPjFRAQUNWnCQAAAFQqi9PprP47Mk2Mm1jNgXoZQ72MoV7GUC9jqJcx1Ms4amYMN7ECAAAAuCwEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiRDgAQAAABMhwAMAAAAmQoAHAAAATIQADwAAAJgIAR4AAAAwEQI8AAAAYCIEeAAAAMBECPAAAACAiXg0wOfn52vatGnq06ePOnXqpLi4OG3ZsqVcfY8ePapHH31U3bt3V9euXTVmzBhlZGS47fPhhx8qPDy8zP+sW7euKk4LAAAAqDI+nhw8Pj5e+/fv19NPP63Q0FCtXLlS8fHxmjNnjqKiosrsl52dreHDhysoKEgJCQny9vbW7Nmzde+99yolJUVNmzaVJPXt21fLly8v0X/69Ok6ePCgrr/++io7NwAAAKAqeCzAb9++XampqUpMTFRMTIwkqWfPnjp27JhmzJhx0QA/f/582e12JScnq0mTJpKkLl26KDo6WrNnz9bUqVMlSYGBgQoMDHTrm52drQMHDuiWW275//buPS6qMv8D+GcQEW+IXIxSQlZ2QBSF1UJAJRWKNfNCKpuKN9IM1FZflr5eXl7tK3ul6WoJohZGmq65KxeNNRUUf2qYlqmZpmC4ArauXBxIARHm/P6wOc7hnMHhzoHP+49ynvM85zzPdx7OfJ8zZ2ZgY2PTSKMjIiIiImoczXYLTWpqKrp27YpRo0aJZRqNBhMmTEB2drbsdhhjaWlp8Pf3F5N3AOjevTtGjBiB1NTUGo+bnJyMhw8fYuLEifUfBBERERFRE2u2K/BZWVlwc3ODhYV0DeHu7g4AyMzM791L0QAAGoVJREFUhJubm6xdeXk5cnJyEBISItvm7u6OlJQUFBYWwt7eXvG4iYmJ6NmzJ4YMGdIAo2h8py/fRuL//YKikgews+mA0MA+8Ovn1NzdIiJ6Ip6/iIgaR7Ml8DqdDr1795aVd+vWTdyupLi4GIIgiPWM2draim2VEvgLFy7g+vXrWLBgATQaTT163zROX76NHV9fRUWlHgBQWPIAO76+CgB8ESSiFo3nLyJSu5Z8EaJZP8RaUxL9pAS7Lgl4QkICLCwsEBoaWuu2Bvb2XerctraST50WX/wMKir1+OzfPyP1+zwYh0ADjeEfj/4n2QZJoXHkNGIzeQONWEcea7FdDduM96v0dMmOrdBnjcJAzOqX0c5qPLaZ+6jp2BpZHYVjP2lsJo6t3HfTY6u5X/J6kuYaM7aZ2y8T/ZPsv6ZtZh67+tw2/mf18Zvbf6W/D8ja/a+GbeYdWzpGM7aZ1S+l2CvERiHA5hy7pnOOdB8a7D12XfH8tffYdfRw6AINjOaMBpLHGs3v+zc6hHGZ8nZpW40GNW43ag6NRiM5J2gU+iS2lWzXSGOisO1J2yXbfj/mvbKH6NTF2nTb3/+tMRSI43r0DzFeKrhQ1VAcHbs2dxdUhzGr2fFzudh56BoePKwC8OgixM5D12DT1RovDHJu5t41YwJva2ureJW9uLgYABSvsBvKNRqNYltDmeFKvLGysjIcPHgQfn5+eOaZZ+rc78LCe9DrhTq3r438u2WK5VV6Ad27WImPhWrdEYwKqvfUuK5g2Copk1aU/Ne4niA9jvL+lfZVfVu1vkjKlPar0BeFfVlatkNlZZV5+5IMQFq/PmNUGpv8uZI3EEzUrd4f+bjlOzZ3XxqNBoIgiPWk1aUxf9IY5TE0vS+FLj9xLpC6ldyvwPvxZ5u7G22O0sLBsACQLN6qLXAkiwRAstAx7MN4Ia20+JEuDqXHr74Qg2RxYrRwMdqvYR+GY7Vv3w6VD/WyPhvXMV5wakwcS3FcRmP6fa+S8ShdkKi+CITRGEwtFOULsRqeB4W+aSAfo/Lz/aisUycrlJVVmFhMQtxbjc+DwnMl9tIo5sb9lW5XjrfJOqaeh2qxetxHhYV59TEqxNtwmM8O/iwm7wYPHlbh85TL6PesPM9saBYWmhovGjdbAu/m5oYjR45Ar9dL7oPPzMwEAGi1WsV21tbWcHZ2FusZy8zMhJ2dneLtM4cPH8a9e/dU9eFVe5sOKCx5oFi+4NUBzdAj9XB07Ir8/N+auxuqocZ4CWYsmmpaKEBhYWXuvhwcuqCg4J7yvmTHMbfP8obmLsCqN61pcV7TQk+xfo3HMb2vv395AcX3K2Rtu3W2wl8nDZTFVRB+77fw+z6MxvK4jmBU9/FxBTxuI9mHuH/hcRujOobDGNd5XCadH9X7ZtwvxTrG/RXbGPVX0laQ1OncuQPu3SuXX3ARpH2TPA/V9iHvf7UxKsSh+rjEOpL+/17HuEw2Lmm85f0XS6o9D9WeX8FoOW9q7IKA9laWqKioVH4ejPZhah4J+sfP0+O21fsrH5fJufak50HyXMnnkWEfxv2tHofq86im51vpeQAAfbV5rxg/klHKy5pDsyXwwcHB2LdvH44dO4agoCCxPDk5Ga6uroofYDUICgrC7t27kZ+fD0dHRwCPrr6np6fj5ZdfVmyTkJAAW1tbybFautDAPpJ7SAHAytICoYF9mrFXRC1DTbfbVCttcJ2s26Njh2a9A7HFmzzSTfH8NXmkG1yc+NZ9TdS4oG5OjFft1SZm8gWEdJFXffFrqo7hv7KFidGiRKmOYVP1xaTSwlXeRlrn8f6rLQQVFjKbEn5EicJFCHubDjVEq+k02ytQYGAgfH19sXz5cuh0OvTq1QvJyck4d+4cYmNjxXrh4eE4e/Ysrl27JpZFRETgwIEDmDt3LqKiomBpaYktW7bA0tIS8+bNkx0rNzcX3333HaZOnQorKyvZ9pbK8EGJlvoBCiIiU3j+Imodqt8+09gXSFqKMBMXIVrKRdRmS+A1Gg1iY2OxYcMGbNy4ESUlJXBzc0NMTAxGjhxZY1sHBwfs3r0ba9euxTvvvANBEDBo0CDs2rVL8f72hIQECIKAV199tbGG02j8+jnBr58TrzAQkerw/EVEatXSL0JoBKUbHMmkpvwQqzG+ANYO41U7jFftMF61w3jVDuNVO4xX7TFmtdMc8XrSh1ib7ZdYiYiIiIio9pjAExERERGpCBN4IiIiIiIVYQJPRERERKQiTOCJiIiIiFSECTwRERERkYowgSciIiIiUhEm8EREREREKsIEnoiIiIhIRSybuwNqY2GhaZPHViPGq3YYr9phvGqH8aodxqt2GK/aY8xqp6nj9aTjaQRBEJqoL0REREREVE+8hYaIiIiISEWYwBMRERERqQgTeCIiIiIiFWECT0RERESkIkzgiYiIiIhUhAk8EREREZGKMIEnIiIiIlIRJvBERERERCrCBJ6IiIiISEUsm7sDbcX9+/exceNGHDp0CCUlJXBzc0NUVBRGjRr1xLY5OTlYs2YNzpw5A71ej8GDB2Pp0qVwc3OT1d25cyd2796NW7duwcnJCWFhYYiIiICFhbrWanWN17/+9S8cPXoU165dQ2FhIZycnDB8+HBERkbCzs5OUtfd3V1xH++++y5ee+21BhtLU6hrvKKjoxETEyMrd3BwwDfffCMrb+vza+TIkbh165biNldXVxw6dEh83Jrm1+3btxEXF4fLly/j6tWrKC0txc6dO+Hr62tW+59++gnr1q3DxYsX0b59ewwdOhTLli3DU089Jan38OFDbNmyBUlJScjPz4eLiwtmzpyJSZMmNcawGk1d41VVVYUdO3bg1KlTyMrKQklJCZ555hmEhIQgIiICXbp0Eevm5eWZnK+ffvophg8f3qBjakz1mV/Lli1DUlKSrHzgwIH45z//KSlr6/MLMH1eAoChQ4di+/btAFrP/Dp9+jT279+P8+fP4/bt2+jWrRsGDBiABQsW1BgLg5acfzGBbyLz58/HlStXsGTJEvTq1QtJSUmYP38+tm7disDAQJPtCgsLMWXKFNjb22Pt2rVo164dtmzZgmnTpiE5ORlOTk5i3djYWERHR2PevHkYMmQIzp8/j48++gjFxcVYsmRJUwyzwdQ1Xps2bYKvry8WL16Mp556CtevX8fmzZtx7NgxJCcnw8bGRlJ/9OjRmDFjhqTM2dm5UcbUmOoaL4P4+Hh06tRJfNy+fXtZHc4vICYmBhUVFZKyzMxMrFy5EkFBQbL6rWV+3bx5E//+97/h6emJIUOG4NixY2a3/eWXXxAeHg4vLy98/PHHKCsrw8aNGxEeHo6kpCR07txZrPvuu+8iJSUFixYtQt++fXH8+HGsWLEClZWVqlr01DVe5eXliImJwZgxYzB58mR0794dly5dQmxsLE6cOIG9e/fC0lL6sj1jxgyMHj1aUtanT58GG0tTqM/8AoBOnTohPj5eUmY8rwza+vwCgL1798rKTp06hejoaMVzmNrn1549e6DT6TBz5kz06dMHBQUFiIuLw8SJE/HFF1/A29vbZNsWn38J1OiOHz8uaLVa4ciRI2KZXq8X/vKXvwghISE1tl27dq3g5eUl3L59WywrKioSfHx8hFWrVknKvLy8hPfee0/SfsOGDYKnp6fw3//+t4FG0/jqE6+CggJZ2ZkzZwStVivs3LlTUq7VaoXVq1c3TKebUX3itWnTJkGr1QrFxcU11uP8Mu29994TtFqtkJ2dLSlvLfNLEAShqqpK/Hdqaqqg1WqFb7/91qy2CxcuFAICAoT79++LZdevXxc8PDyEbdu2iWWZmZmCVqsV4uPjJe0XL14sPPfcc0J5eXn9BtGE6hqvyspKoaioSFaemJgoaLVa4ejRo2JZbm6uYrzUqD7za+nSpcKgQYOeWI/zy7Q33nhDGDBggFBSUiKWtZb5pZQTFBcXC4MHDxbmz59fY9uWnn+p631vlUpNTUXXrl0lb0dpNBpMmDAB2dnZuH79usm2aWlp8Pf3l7zV3L17d4wYMQKpqali2cmTJ/HgwQNMmDBB0n7ChAmorKzE0aNHG3BEjas+8bK3t5eVeXl5AXj0tmNrVJ94mYvzS1lFRQW++uorDBo0CK6uro3R3Rahrm8BP3z4EMePH0dISIjkHZ4+ffpg4MCBOHLkiFiWlpYGjUaDsWPHSvYRGhqK4uJifPvtt3XrfDOoa7zatWuH7t27y8pb+zmsKW7B4/xSVlBQgJMnT+LFF19E165dG2y/LYVSTmBjYwMXF5cn/j219PyLCXwTyMrKgpubm+yPznD/VWZmpmK78vJy5OTkQKvVyra5u7ujsLAQhYWF4jE0Gg3++Mc/Sur17t0b1tbWyMrKaoihNIm6xssUw4m5emwAYP/+/RgwYAC8vLwwadIkHDx4sI69bj4NEa/Ro0ejb9++GDp0KFasWCHOK+NjcH7JpaWlQafT4dVXX1Xc3hrmV33k5uaivLxc8W/P3d1dMm+ysrLg4OBg8rMqtf27b00M5zCl14KtW7eif//+8Pb2Rnh4OE6fPt3U3Wt2paWl8Pf3R9++fTFixAisWbMG9+/fl9Th/FKWlJSEyspKk+ew1ji/ioqKkJWVpXheMlBD/sV74JuATqdD7969ZeXdunUTtyspLi6GIAhiPWO2trZiW3t7e+h0OnTs2BFWVlayujY2NiaP0RLVNV6m9rV69Wr07t1bdh/fK6+8gsDAQDz99NO4c+cO9uzZg0WLFiE/P19233JLVp94OTs7Y/Hixejbty/at2+PH374AXFxcTh9+jQSExMl++D8kktISECnTp3w5z//Wbattcyv+jDE0tQ5rLy8HOXl5bC2toZOpxPPa8bq8ry0Jrm5udi0aROef/55DB48WCy3srLC5MmTERAQAAcHB+Tl5SE+Ph6zZs1CdHQ0goODm7HXTcfDwwMeHh7QarWoqqpCRkYGvvjiC3z//ffYs2eP+Hkezi9liYmJcHZ2ln0AtrXOL0EQsHLlSuj1ekRERJisp4b8iwl8E9FoNHXaZs72+h6/JapPvAzKysoQFRWF4uJi7Nq1S/bHtX79esnjkJAQhIeH46OPPkJYWBisra1r3/FmUtd4jR8/XvLYz88P3t7emD17Nnbv3o3IyMh6H78laoj5dfv2bWRkZCA0NFRye4hBa5pf9WVuvJXqGcrUNscaQlFREebOnYuOHTti3bp1km09evTAe++9Jz4ePHgwXnrpJYwfPx4ffvihahOs2po5c6bk8bBhw+Dq6oqVK1fi4MGDGDdunLiN80vqhx9+QHZ2Nt566y3Z+Fvr/Prwww+RlpaGDz74wKwP47bk/Iu30DQBW1tbxRVYcXExAOWrU4ZyjUaj2NZQZlgJ2traoqysTPYNGQBQUlJi8hgtUV3jZay8vBxvvvkmrly5gk8++QQeHh5PbGNhYYGxY8eitLRUVW+nNkS8jAUEBMDR0REXLlyQHIPzSyoxMRF6vd7kW8/VqXV+1YfxlarqdDodrK2t0aFDB7Hu3bt3FesBtZ/Hanf37l3MnDkTv/32Gz7//HPJN16Y0rFjR7z00kvIyclBUVFRE/SyZRo7diwsLCxk5zDOL6mEhARYWFggNDTUrPpqn18bN27EZ599huXLlz9xzGrIv5jANwE3Nzf88ssv0Ov1knLDi7jSPVYAYG1tDWdnZ8UX+8zMTNjZ2Ykf0HBzc4MgCLJ7rW7evGnyHtSWqq7xMnjw4AEiIyNx4cIFbNu2DX/605/MPrbhmGr6XvP6xkuJIAiSGHB+SQmCgKSkJPzhD39o9fOrPpydnU3eA5qZmSmZN25ubigoKJAlWfWZx2pl+Nq7/Px8xMfH1+oD0oY51havKBsIggAAsnMY59djpaWl+PrrrxEQEGDW4tBArfPr448/xtatW/H2229j+vTpT6yvhvyrbbyKNLPg4GCUlJTIvqs1OTkZrq6uij8IYBAUFISMjAzk5+eLZTqdDunp6ZK3sIYPHw4rKyvs379f0j4pKQmWlpYYOXJkA42m8dUnXhUVFYiMjMT333+P2NhYPP/882YfV6/X46uvvkLnzp1VlZDWJ15KTp06hYKCAgwcOFAs4/ySOnv2LHJycsy++g6od37VR/v27REYGIjDhw+jrKxMLL9x4wYuXLiAF198USwLCgqCIAg4cOCAZB9JSUmwsbEx+0ej1K64uBizZs3C7du3ER8fX6u5UlZWhiNHjsDFxUXx22zaigMHDkCv10vOYZxfUocOHcL9+/drdQ5T6/yKiYlBbGws3nrrLbz++utmt2vp+RfvgW8CgYGB8PX1xfLly6HT6dCrVy8kJyfj3LlziI2NFeuFh4fj7NmzuHbtmlgWERGBAwcOYO7cuYiKioKlpSW2bNkCS0tLzJs3T6zXvXt3vPHGG4iNjUXXrl3h6+uLCxcuIC4uDtOnT8fTTz/dpGOuj/rEa+HChTh16hSioqLQqVMnyVuodnZ2ePbZZwEA27dvx40bNzBkyBA4OjqioKAAe/bswblz57Bq1SrxbX01qE+8xo8fj/Hjx8PV1RWWlpY4f/48tm/fDhcXF0ydOlWsx/kllZCQAEtLS9lnCAxa0/wyMPzK7KVLlwAA3333He7evYuOHTuKP35leKEyXhwtXLgQkyZNwptvvonZs2eLP+TUs2dPTJkyRayn1WoRGhqKDRs2QBAEeHp6Ij09HQcOHMCqVatU95mBusSrvLwcERERuHr1KlasWIHy8nLJOczJyUm8WrpmzRro9Xr4+PjAzs4Ot27dwueff47c3Fxs3ry5ycbZUOoSr1u3buGdd97Byy+/jGeffRZVVVU4ffo0du3aBR8fH8kXF3B+SSUkJMDW1tbkr622lvn12WefITo6GiNGjIC/v7/k78nKygqenp4A1Jl/aQTDe03UqO7du4cNGzbg8OHDkp9uN/7lM1MJw3/+8x+sXbsWZ86cgSAIGDRoEJYuXSq7MiMIAnbs2IF//OMf+PXXX9GjRw+EhYVhzpw5qnvLvq7xqumnkSdMmIA1a9YAeHRCi4uLQ3Z2Nn777Td07NgR/fr1w4wZM1R1NdmgrvFavHgxfvrpJ9y5cweVlZVwcnLCyJEjERkZKfvGBs6vx22HDh0KPz8/bNmyRXH/rW1+Aab/tnr27CkmCKYShh9//BHr16/Hjz/+CEtLSwQEBGDZsmWyF7aKigrExsYiOTkZBQUFcHZ2xqxZszB58uRGGFHjqku8avr5euDRLwgvWLAAALBv3z7s3bsXN2/exP3799GlSxf4+Phgzpw5GDRoUEMOpUnUJV7FxcVYvnw5rly5goKCAgiCAGdnZ4SEhGDu3LmypLytzy+DnJwcBAcHIzw8HCtWrFDcT2uZX4bzuBLjWKkx/2ICT0RERESkIuq6bEZERERE1MYxgSciIiIiUhEm8EREREREKsIEnoiIiIhIRZjAExERERGpCBN4IiIiIiIVYQJPREQtXnh4uGq/Q5+IqKHxl1iJiNqoM2fOYPr06Sa3t2vXDleuXGnCHhERkTmYwBMRtXFjxozB8OHDZeVq+4VdIqK2ggk8EVEb5+npiXHjxjV3N4iIyEy8vEJERDXKy8uDu7s7oqOjkZKSgldeeQVeXl544YUXEB0djcrKSlmbq1evIioqCr6+vvDy8sLo0aPx6aefoqqqSlY3Pz8fq1evxqhRo9C/f3/4+flh1qxZ+Oabb2R1//e//2Hx4sV47rnn4O3tjYiICNy4caNRxk1E1FLxCjwRURtXVlaGoqIiWbmVlRW6dOkiPk5PT8eOHTswdepUODg44NixY4iJicGvv/6KDz74QKx36dIlhIeHw9LSUqybnp6O9evX4+rVq/j73/8u1s3Ly8Nrr72GwsJCjBs3Dv3790dZWRkuXryIjIwMBAQEiHVLS0sxbdo0DBw4EIsWLUJeXh527tyJyMhIpKSkoF27do0UISKiloUJPBFRGxcdHY3o6GhZ+QsvvIBt27aJj3/++Wfs27cP/fr1AwBMmzYN8+fPR2JiIsLCwuDt7Q0AeP/991FRUYEvv/wSHh4eYt2//vWvSElJwcSJE+Hn5wcA+Nvf/oY7d+4gLi4Ow4YNkxxfr9dLHt+9excRERGYM2eOWGZnZ4d169YhIyND1p6IqLViAk9E1MaFhYUhJCREVm5nZyd57O/vLybvAKDRaPD6668jLS0Nqamp8Pb2RmFhIc6fP4/g4GAxeTfUnTdvHg4dOoTU1FT4+flBp9Ph5MmTGDZsmGLyXf1DtBYWFrJvzRkyZAgA4ObNm0zgiajNYAJPRNTGubi4wN/f/4n1+vTpIytzc3MDAOTm5gJ4dEuMcXn19hYWFmLdnJwcCIIAT09Ps/rZo0cPdOjQQVJma2sLANDpdGbtg4ioNeCHWImIyCwajeaJdQRBMHt/hrrm7BdAjfe41+a4RERqxwSeiIjMcv36dZNlzs7Okv8r1c3OzoZerxfruLi4QKPR8MeiiIhqiQk8ERGZJSMjA5cvXxYfC4KAuLg4AEBQUBAAwN7eHj4+PkhPT0dmZqak7ieffAIACA4OBvDo9pfhw4fjxIkTyMjIkB2PV9WJiJTxHngiojbuypUr2L9/v+I2Q2IOAB4eHpgxYwamTp0KR0dHHD16FBkZGRg3bhx8fHzEesuXL0d4eDimTp2KKVOmwNHREenp6Th16hTGjBkjfgMNAKxcuRJXrlzBnDlzMH78ePTr1w8PHjzAxYsX0bNnT7z99tuNN3AiIpViAk9E1MalpKQgJSVFcduRI0fEe89HjhwJV1dXbNu2DTdu3IC9vT0iIyMRGRkpaePl5YUvv/wSmzZtwp49e1BaWgpnZ2csWbIEs2fPltR1dnZGQkICNm/ejBMnTmD//v2wsbGBh4cHwsLCGmfAREQqpxH4HiUREdUgLy8Po0aNwvz587FgwYLm7g4RUZvHe+CJiIiIiFSECTwRERERkYowgSciIiIiUhHeA09EREREpCK8Ak9EREREpCJM4ImIiIiIVIQJPBERERGRijCBJyIiIiJSESbwREREREQqwgSeiIiIiEhF/h8cySJ2GoM4pQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 864x432 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "import seaborn as sns\n",
    "\n",
    "# Use plot styling from seaborn.\n",
    "sns.set(style='darkgrid')\n",
    "\n",
    "# Increase the plot size and font size.\n",
    "sns.set(font_scale=1.5)\n",
    "plt.rcParams[\"figure.figsize\"] = (12, 6)\n",
    "\n",
    "# Plot the learning curve.\n",
    "plt.plot(loss_values, 'b-o', label=\"training loss\")\n",
    "plt.plot(validation_loss_values, 'r-o', label=\"validation loss\")\n",
    "\n",
    "# Label the plot.\n",
    "plt.title(\"Learning curve\")\n",
    "plt.xlabel(\"Epoch\")\n",
    "plt.ylabel(\"Loss\")\n",
    "plt.legend()\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    ">  - 模型训练周次太少\n",
    " - 模型很快稳定下来"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 使用模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T02:12:54.337363Z",
     "start_time": "2020-05-28T02:12:54.331121Z"
    }
   },
   "outputs": [],
   "source": [
    "test_sentence = \"\"\"\n",
    "Mr. Trump’s tweets began just moments after a Fox News report by Mike Tobin, a \n",
    "reporter for the network, about protests in Minnesota and elsewhere. \n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T02:13:10.729261Z",
     "start_time": "2020-05-28T02:13:10.715598Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[  101,  1828,   119,  8499,   787,   188,   189, 24887,  1116,  1310,\n",
      "          1198,  4899,  1170,   170,  3977,  3128,  2592,  1118,  2639,  1706,\n",
      "          7939,   117,   170,  6672,  1111,  1103,  2443,   117,  1164,  7853,\n",
      "          1107,  4332,  1105,  6890,   119,   102]], device='cuda:0')\n"
     ]
    }
   ],
   "source": [
    "tokenized_sentence = tokenizer.encode(test_sentence)\n",
    "input_ids = torch.tensor([tokenized_sentence]).cuda()\n",
    "print(input_ids)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T02:13:26.293023Z",
     "start_time": "2020-05-28T02:13:26.279954Z"
    }
   },
   "outputs": [],
   "source": [
    "with torch.no_grad():\n",
    "    output = model(input_ids)\n",
    "label_indices = np.argmax(output[0].to('cpu').numpy(), axis=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T02:13:47.479519Z",
     "start_time": "2020-05-28T02:13:47.476496Z"
    }
   },
   "outputs": [],
   "source": [
    "# join bpe split tokens\n",
    "tokens = tokenizer.convert_ids_to_tokens(input_ids.to('cpu').numpy()[0])\n",
    "new_tokens, new_labels = [], []\n",
    "for token, label_idx in zip(tokens, label_indices[0]):\n",
    "    if token.startswith(\"##\"):\n",
    "        new_tokens[-1] = new_tokens[-1] + token[2:]\n",
    "    else:\n",
    "        new_labels.append(tag_values[label_idx])\n",
    "        new_tokens.append(token)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-05-28T02:13:56.083457Z",
     "start_time": "2020-05-28T02:13:56.076509Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "O\t[CLS]\n",
      "B-per\tMr\n",
      "B-per\t.\n",
      "I-per\tTrump\n",
      "O\t’\n",
      "O\ts\n",
      "O\ttweets\n",
      "O\tbegan\n",
      "O\tjust\n",
      "O\tmoments\n",
      "O\tafter\n",
      "O\ta\n",
      "B-org\tFox\n",
      "I-org\tNews\n",
      "O\treport\n",
      "O\tby\n",
      "B-per\tMike\n",
      "I-per\tTobin\n",
      "O\t,\n",
      "O\ta\n",
      "O\treporter\n",
      "O\tfor\n",
      "O\tthe\n",
      "O\tnetwork\n",
      "O\t,\n",
      "O\tabout\n",
      "O\tprotests\n",
      "O\tin\n",
      "B-geo\tMinnesota\n",
      "O\tand\n",
      "O\telsewhere\n",
      "O\t.\n",
      "O\t[SEP]\n"
     ]
    }
   ],
   "source": [
    "for token, label in zip(new_tokens, new_labels):\n",
    "    print(\"{}\\t{}\".format(label, token))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
